{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from collections import Counter\n",
    "\n",
    "from tqdm import tqdm\n",
    "import exiftool\n",
    "\n",
    "# ai4eutils is on path\n",
    "from path_utils import recursive_file_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Peace Parks Foundation images of human\n",
    "\n",
    "\n",
    "Private dataset.\n",
    "\n",
    "The folder after the Day/Night level is the number of people present in the image.\n",
    "\n",
    "The location seems to be the number inside the brackets (), but they are not the same as the ones recorded inside the image footer. Image names without () are from location UMGR16 - these are given '1' as their location, which is unused. There's a small amount of inconsistency in the location label in the (), for example (15) seems to have changed viewshed in the 1-person daytime folder. But the same viewshed do not appear across different locations recorded in (), it seems.\n",
    "\n",
    "Timestamp is in EXIF. Height and width of image also from EXIF.\n",
    "\n",
    "Sequence info not present. Could extract from timestamp, but is not done here.\n",
    "\n",
    "AzCopy'ed the images to blob\n",
    "```\n",
    "Elapsed Time (Minutes): 9.2087\n",
    "Total Number Of Transfers: 4292\n",
    "```\n",
    "\n",
    "The `image_id` is the file path without space. The `file_name` field is the path in blob, which contains space chars."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'peaceparks_201908_humans'\n",
    "\n",
    "root_dir = '/Users/siyuyang/Source/temp_data/CameraTrap/PPF_humans/201908_humans/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_files = recursive_file_list(root_dir)\n",
    "image_files = [i for i in image_files if i.lower().endswith('.jpg')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4291"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(image_files) # 4301 - 8 = 4293, two of which do not end with .jpg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extract EXIF info and make the *embedded* database."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4291/4291 [00:42<00:00, 100.72it/s]\n"
     ]
    }
   ],
   "source": [
    "explicit_locations = set()\n",
    "image_items = []\n",
    "\n",
    "with exiftool.ExifTool() as et:\n",
    "    for image_path in tqdm(image_files):\n",
    "        \n",
    "        try:\n",
    "            metadata = et.get_metadata(image_path)\n",
    "            \n",
    "            datetime = metadata['EXIF:DateTimeOriginal']\n",
    "            height = metadata['EXIF:ExifImageHeight']\n",
    "            width = metadata['EXIF:ExifImageWidth']\n",
    "            \n",
    "            file_name = image_path.split(root_dir)[1]\n",
    "            image_id = file_name.split('.JPG')[0].replace('/', '~')\n",
    "            \n",
    "            basename = os.path.basename(file_name)\n",
    "            if '(' in basename:\n",
    "                location = str(int(basename.split('(')[1].split(')')[0]))\n",
    "                explicit_locations.add(location)\n",
    "            else:\n",
    "                location = '1'\n",
    "            \n",
    "            image_items.append({\n",
    "                'image_id': image_id,\n",
    "                'file_name': file_name,\n",
    "                'dataset': dataset_name,\n",
    "                \n",
    "                'width': width,\n",
    "                'height': height,\n",
    "                'datetime': datetime,\n",
    "                'location': location,\n",
    "                \n",
    "                'annotations': {\n",
    "                    'species': ['human']\n",
    "                }\n",
    "                \n",
    "            })\n",
    "    \n",
    "        except Exception as e:\n",
    "            print('Exception with image {}! {}'.format(image_path, e))\n",
    "            break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['10', '11', '12', '13', '14', '15', '2', '3', '4', '5', '6', '7', '8', '9']"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(list(explicit_locations))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4291"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(image_items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'annotations': {'species': ['human']},\n",
       "  'dataset': 'peaceparks_201908_humans',\n",
       "  'datetime': '2019:06:27 14:19:06',\n",
       "  'file_name': 'Day/1/IMAG0301.JPG',\n",
       "  'height': 2448,\n",
       "  'image_id': 'Day~1~IMAG0301',\n",
       "  'location': '1',\n",
       "  'width': 3264},\n",
       " {'annotations': {'species': ['human']},\n",
       "  'dataset': 'peaceparks_201908_humans',\n",
       "  'datetime': '2019:06:26 17:08:27',\n",
       "  'file_name': 'Day/1/IMAG0154 (3).JPG',\n",
       "  'height': 2448,\n",
       "  'image_id': 'Day~1~IMAG0154 (3)',\n",
       "  'location': '3',\n",
       "  'width': 3264}]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "image_items[100:102]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/OneDrive - Microsoft/AI4Earth/CameraTrap/Databases/cosmos/peaceparks_201908_humans_20190812_embedded.json', 'w') as f:\n",
    "    json.dump(image_items, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:tensorflow]",
   "language": "python",
   "name": "conda-env-tensorflow-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
